From 57b90ce69638ae72876f4c8d81072945fdf9c0b8 Mon Sep 17 00:00:00 2001
From: "kaf24@scramble.cl.cam.ac.uk" <kaf24@scramble.cl.cam.ac.uk>
Date: Sun, 16 Nov 2003 23:44:13 +0000
Subject: [PATCH] bitkeeper revision 1.607 (3fb80bcdDsC2bacgLhXMLo9Gck9Icg)

dev.c, netdevice.h, interrupt.h, xen_block.c, memory.c, mm.c:
  Clean up locking in blkdev and net code. Many locks are no longer ever held in interrupt context.
---
 xen/arch/i386/mm.c            |  12 ++--
 xen/common/memory.c           |  19 +++--
 xen/drivers/block/xen_block.c |  99 +++++++++++++++++---------
 xen/include/xeno/interrupt.h  |  18 ++---
 xen/include/xeno/netdevice.h  |  34 +++++++--
 xen/net/dev.c                 | 126 +++++++++++++++++++++-------------
 6 files changed, 195 insertions(+), 113 deletions(-)

diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c
index 0546f53582..a51ac43a23 100644
--- a/xen/arch/i386/mm.c
+++ b/xen/arch/i386/mm.c
@@ -213,12 +213,12 @@ long set_gdt(struct task_struct *p,
 {
     /* NB. There are 512 8-byte entries per GDT page. */
     unsigned int i, j, nr_pages = (entries + 511) / 512;
-    unsigned long pfn, *gdt_page, flags;
+    unsigned long pfn, *gdt_page;
     long ret = -EINVAL;
     struct pfn_info *page;
     struct desc_struct *vgdt;
 
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
 
     /* Check the new GDT. */
     for ( i = 0; i < nr_pages; i++ )
@@ -284,7 +284,7 @@ long set_gdt(struct task_struct *p,
     ret = 0; /* success */
 
  out:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
     return ret;
 }
 
@@ -314,14 +314,14 @@ long do_set_gdt(unsigned long *frame_list, unsigned int entries)
 long do_update_descriptor(
     unsigned long pa, unsigned long word1, unsigned long word2)
 {
-    unsigned long *gdt_pent, flags, pfn = pa >> PAGE_SHIFT;
+    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
     struct pfn_info *page;
     long ret = -EINVAL;
 
     if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
         return -EINVAL;
 
-    spin_lock_irqsave(&current->page_lock, flags);
+    spin_lock(&current->page_lock);
 
     page = frame_table + pfn;
     if ( (page->flags & PG_domain_mask) != current->domain )
@@ -353,6 +353,6 @@ long do_update_descriptor(
     ret = 0; /* success */
 
  out:
-    spin_unlock_irqrestore(&current->page_lock, flags);
+    spin_unlock(&current->page_lock);
     return ret;
 }
diff --git a/xen/common/memory.c b/xen/common/memory.c
index c2349d3240..01c846542a 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -132,6 +132,7 @@
 #include <xeno/sched.h>
 #include <xeno/errno.h>
 #include <xeno/perfc.h>
+#include <xeno/interrupt.h>
 #include <asm/page.h>
 #include <asm/flushtlb.h>
 #include <asm/io.h>
@@ -253,11 +254,15 @@ int map_ldt_shadow_page(unsigned int off)
 {
     struct task_struct *p = current;
     unsigned long addr = p->mm.ldt_base + (off << PAGE_SHIFT);
-    unsigned long l1e, *ldt_page, flags;
+    unsigned long l1e, *ldt_page;
     struct pfn_info *page;
     int i, ret = -1;
 
-    spin_lock_irqsave(&p->page_lock, flags);
+    /* We cannot take a page_lock in interrupt context. */
+    if ( in_interrupt() )
+        BUG();
+
+    spin_lock(&p->page_lock);
 
     __get_user(l1e, (unsigned long *)(linear_pg_table+(addr>>PAGE_SHIFT)));
     if ( unlikely(!(l1e & _PAGE_PRESENT)) )
@@ -294,7 +299,7 @@ int map_ldt_shadow_page(unsigned int off)
     ret = 0;
 
  out:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
     return ret;
 }
 
@@ -865,7 +870,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
 
         err = 1;
 
-        spin_lock_irq(&current->page_lock);
+        spin_lock(&current->page_lock);
 
         /* Get the page-frame number that a non-extended command references. */
         if ( (cmd == MMU_NORMAL_PT_UPDATE) || 
@@ -974,7 +979,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
         }
 
     unlock:
-        spin_unlock_irq(&current->page_lock);
+        spin_unlock(&current->page_lock);
 
         if ( unlikely(err) )
         {
@@ -1015,7 +1020,7 @@ int do_update_va_mapping(unsigned long page_nr,
     if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
         goto out;
 
-    spin_lock_irq(&p->page_lock);
+    spin_lock(&p->page_lock);
 
     /* Check that the VA's page-directory entry is present.. */
     if ( unlikely((err = __get_user(_x, (unsigned long *)
@@ -1047,7 +1052,7 @@ int do_update_va_mapping(unsigned long page_nr,
     if ( unlikely(cr0 != 0) )
         write_cr0(cr0);
  unlock_and_out:
-    spin_unlock_irq(&p->page_lock);
+    spin_unlock(&p->page_lock);
  out:
     return err;
 }
diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c
index bc4dad260b..6172fd6858 100644
--- a/xen/drivers/block/xen_block.c
+++ b/xen/drivers/block/xen_block.c
@@ -58,6 +58,8 @@ static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
 static kmem_cache_t *buffer_head_cachep;
 static atomic_t nr_pending;
 
+static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
+
 static int __buffer_is_valid(struct task_struct *p, 
                              unsigned long buffer, 
                              unsigned short size,
@@ -166,41 +168,68 @@ static void maybe_trigger_io_schedule(void)
 
 /******************************************************************
  * COMPLETION CALLBACK -- Called as bh->b_end_io()
- * NB. This can be called from interrupt context!
  */
 
+static void end_block_io_op_softirq(struct softirq_action *h)
+{
+    pending_req_t *pending_req;
+    struct buffer_head *bh, *nbh;
+    unsigned int cpu = smp_processor_id();
+
+    local_irq_disable();
+    bh = completed_bhs[cpu];
+    completed_bhs[cpu] = NULL;
+    local_irq_enable();
+
+    while ( bh != NULL )
+    {
+        pending_req = bh->pending_req;
+        
+        unlock_buffer(pending_req->domain, 
+                      virt_to_phys(bh->b_data), 
+                      bh->b_size, 
+                      (pending_req->operation==READ));
+        
+        if ( atomic_dec_and_test(&pending_req->pendcnt) )
+        {
+            make_response(pending_req->domain, pending_req->id,
+                          pending_req->operation, pending_req->status);
+            put_task_struct(pending_req->domain);
+            spin_lock(&pend_prod_lock);
+            pending_ring[pending_prod] = pending_req - pending_reqs;
+            PENDREQ_IDX_INC(pending_prod);
+            spin_unlock(&pend_prod_lock);
+            atomic_dec(&nr_pending);
+            maybe_trigger_io_schedule();
+        }
+        
+        nbh = bh->b_reqnext;
+        kmem_cache_free(buffer_head_cachep, bh);
+        bh = nbh;
+    }
+}
+
 static void end_block_io_op(struct buffer_head *bh, int uptodate)
 {
     unsigned long flags;
-    pending_req_t *pending_req = bh->pending_req;
+    unsigned int cpu = smp_processor_id();
 
     /* An error fails the entire request. */
     if ( !uptodate )
     {
         DPRINTK("Buffer not up-to-date at end of operation\n");
-        pending_req->status = 2;
+        bh->pending_req->status = 2;
     }
 
-    unlock_buffer(pending_req->domain, 
-                  virt_to_phys(bh->b_data), 
-                  bh->b_size, 
-                  (pending_req->operation==READ));
-
-    if ( atomic_dec_and_test(&pending_req->pendcnt) )
-    {
-        make_response(pending_req->domain, pending_req->id,
-                      pending_req->operation, pending_req->status);
-        put_task_struct(pending_req->domain);
-        spin_lock_irqsave(&pend_prod_lock, flags);
-        pending_ring[pending_prod] = pending_req - pending_reqs;
-        PENDREQ_IDX_INC(pending_prod);
-        spin_unlock_irqrestore(&pend_prod_lock, flags);
-        atomic_dec(&nr_pending);
-        maybe_trigger_io_schedule();
-    }
+    local_irq_save(flags);
+    bh->b_reqnext = completed_bhs[cpu];
+    completed_bhs[cpu] = bh;
+    local_irq_restore(flags);
 
-    kmem_cache_free(buffer_head_cachep, bh);
+    __cpu_raise_softirq(cpu, BLKDEV_RESPONSE_SOFTIRQ);
 }
+
+
 /* ----[ Syscall Interface ]------------------------------------------------*/
 
 long do_block_io_op(block_io_op_t *u_block_io_op)
@@ -364,10 +393,10 @@ static void unlock_buffer(struct task_struct *p,
                           unsigned short size,
                           int writeable_buffer)
 {
-    unsigned long    pfn, flags;
+    unsigned long    pfn;
     struct pfn_info *page;
 
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
     for ( pfn = buffer >> PAGE_SHIFT; 
           pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
           pfn++ )
@@ -377,7 +406,7 @@ static void unlock_buffer(struct task_struct *p,
             put_page_type(page);
         put_page_tot(page);
     }
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
 }
 
 static int do_block_io_op_domain(struct task_struct *p, int max_to_do)
@@ -438,7 +467,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     struct buffer_head *bh;
     int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
     unsigned short nr_sects;
-    unsigned long buffer, flags;
+    unsigned long buffer;
     int i, tot_sects;
     pending_req_t *pending_req;
 
@@ -446,7 +475,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     int new_segs, nr_psegs = 0;
     phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
 
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
 
     /* Check that number of segments is sane. */
     if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) )
@@ -516,7 +545,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     for ( i = 0; i < nr_psegs; i++ )
         __lock_buffer(phys_seg[i].buffer, phys_seg[i].nr_sects<<9, 
                       (operation==READ));
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
 
     atomic_inc(&nr_pending);
     pending_req = pending_reqs + pending_ring[pending_cons];
@@ -560,7 +589,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     return;
 
  bad_descriptor:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
     make_response(p, req->id, req->operation, 1);
 } 
 
@@ -574,19 +603,19 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
 static void make_response(struct task_struct *p, unsigned long id, 
 			  unsigned short op, unsigned long st)
 {
-    unsigned long cpu_mask, flags;
+    unsigned long cpu_mask;
     int position;
     blk_ring_t *blk_ring;
 
     /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&p->blk_ring_lock, flags);
+    spin_lock(&p->blk_ring_lock);
     blk_ring = p->blk_ring_base;
     position = p->blk_resp_prod;
     blk_ring->ring[position].resp.id        = id;
     blk_ring->ring[position].resp.operation = op;
     blk_ring->ring[position].resp.status    = st;
     p->blk_resp_prod = blk_ring->resp_prod = BLK_RING_INC(position);
-    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+    spin_unlock(&p->blk_ring_lock);
     
     /* Kick the relevant domain. */
     cpu_mask = mark_guest_event(p, _EVENT_BLKDEV);
@@ -659,7 +688,13 @@ void initialize_block_io ()
     atomic_set(&nr_pending, 0);
     pending_prod = pending_cons = 0;
     memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ ) pending_ring[i] = i;
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+    
+    for ( i = 0; i < NR_CPUS; i++ )
+        completed_bhs[i] = NULL;
+        
+    open_softirq(BLKDEV_RESPONSE_SOFTIRQ, end_block_io_op_softirq, NULL);
 
     spin_lock_init(&io_schedule_list_lock);
     INIT_LIST_HEAD(&io_schedule_list);
diff --git a/xen/include/xeno/interrupt.h b/xen/include/xeno/interrupt.h
index c37e4efd73..4af244da05 100644
--- a/xen/include/xeno/interrupt.h
+++ b/xen/include/xeno/interrupt.h
@@ -21,33 +21,23 @@ struct irqaction {
 };
 
 
-/* Who gets which entry in bh_base.  Things which will occur most often
-   should come first */
-   
 enum {
 	TIMER_BH = 0,
-	TQUEUE_BH,
-	SCSI_BH,
-	IMMEDIATE_BH
+	SCSI_BH
 };
 
 #include <asm/hardirq.h>
 #include <asm/softirq.h>
 
 
-
-/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
-   frequency threaded job scheduling. For almost all the purposes
-   tasklets are more than enough. F.e. all serial device BHs et
-   al. should be converted to tasklets, not to softirqs.
- */
-
 enum
 {
 	HI_SOFTIRQ=0,
 	NET_RX_SOFTIRQ,
 	AC_TIMER_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+        BLKDEV_RESPONSE_SOFTIRQ,
+        NET_TX_SOFTIRQ
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
diff --git a/xen/include/xeno/netdevice.h b/xen/include/xeno/netdevice.h
index de639a884d..ea7f56e38e 100644
--- a/xen/include/xeno/netdevice.h
+++ b/xen/include/xeno/netdevice.h
@@ -40,6 +40,12 @@
 
 struct vlan_group;
 
+extern struct skb_completion_queues {
+    struct sk_buff *rx; /* Packets received in interrupt context. */
+    unsigned int rx_qlen;
+    struct sk_buff *tx; /* Tx buffers defunct in interrupt context. */
+} skb_queue[NR_CPUS] __cacheline_aligned;
+
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
 #define NET_RX_DROP		1  /* packet dropped */
@@ -453,12 +459,30 @@ static inline int netif_running(struct net_device *dev)
 }
 
 
-/*
- * Xen does not need deferred skb freeing, as all destructor hook functions 
- * are IRQ safe. Linux needed more care for some destructors...
+/* Use this variant when it is known for sure that it
+ * is executing from interrupt context.
+ */
+static inline void dev_kfree_skb_irq(struct sk_buff *skb)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
+	local_irq_save(flags);
+	skb->next = skb_queue[cpu].tx;
+	skb_queue[cpu].tx = skb;
+	__cpu_raise_softirq(cpu, NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+/* Use this variant in places where it could be invoked
+ * either from interrupt or non-interrupt context.
  */
-#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb)
-#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb)
+static inline void dev_kfree_skb_any(struct sk_buff *skb)
+{
+	if (in_irq())
+		dev_kfree_skb_irq(skb);
+	else
+		dev_kfree_skb(skb);
+}
 
 extern void		net_call_rx_atomic(void (*fn)(void));
 extern int		netif_rx(struct sk_buff *skb);
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 3ecec620e7..e7b9f2d01c 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -50,7 +50,7 @@
 #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
 #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
 
-static struct sk_buff_head rx_skb_queue[NR_CPUS] __cacheline_aligned;
+struct skb_completion_queues skb_queue[NR_CPUS] __cacheline_aligned;
 
 static int get_tx_bufs(net_vif_t *vif);
 
@@ -607,35 +607,40 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
 
 int netif_rx(struct sk_buff *skb)
 {
-    int this_cpu = smp_processor_id();
-    struct sk_buff_head *q = &rx_skb_queue[this_cpu];
+    int cpu = smp_processor_id();
     unsigned long flags;
 
-    /* This oughtn't to happen, really! */
-    if ( unlikely(skb_queue_len(q) > 100) )
+    local_irq_save(flags);
+
+    if ( unlikely(skb_queue[cpu].rx_qlen > 100) )
     {
+        local_irq_restore(flags);
         perfc_incr(net_rx_congestion_drop);
         return NET_RX_DROP;
     }
 
-    local_irq_save(flags);
-    __skb_queue_tail(q, skb);
+    skb->next = skb_queue[cpu].rx;
+    skb_queue[cpu].rx = skb;
+
     local_irq_restore(flags);
 
-    __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
+    __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ);
 
     return NET_RX_SUCCESS;
 }
 
 static void net_rx_action(struct softirq_action *h)
 {
-    int offset, this_cpu = smp_processor_id();
-    struct sk_buff_head *q = &rx_skb_queue[this_cpu];
-    struct sk_buff *skb;
+    int offset, cpu = smp_processor_id();
+    struct sk_buff *skb, *nskb;
 
     local_irq_disable();
-    
-    while ( (skb = __skb_dequeue(q)) != NULL )
+    skb = skb_queue[cpu].rx;
+    skb_queue[cpu].rx = NULL;
+    skb_queue[cpu].rx_qlen = 0;
+    local_irq_enable();
+
+    while ( skb != NULL )
     {
         ASSERT(skb->skb_type == SKB_ZERO_COPY);
 
@@ -652,7 +657,7 @@ static void net_rx_action(struct softirq_action *h)
         skb_push(skb, ETH_HLEN);
         skb->mac.raw = skb->data;
         
-        netdev_rx_stat[this_cpu].total++;
+        netdev_rx_stat[cpu].total++;
         
         if ( skb->dst_vif == NULL )
             skb->dst_vif = net_get_target_vif(
@@ -668,10 +673,11 @@ static void net_rx_action(struct softirq_action *h)
         }
 
         unmap_domain_mem(skb->head);
+
+        nskb = skb->next;
         kfree_skb(skb);
+        skb = nskb;
     }
-
-    local_irq_enable();
 }
 
 
@@ -823,39 +829,58 @@ static inline void maybe_schedule_tx_action(void)
 }
 
 
+static void net_tx_gc(struct softirq_action *h)
+{
+    int cpu = smp_processor_id();
+    struct sk_buff *skb, *nskb;
+
+    local_irq_disable();
+    skb = skb_queue[cpu].tx;
+    skb_queue[cpu].tx = NULL;
+    local_irq_enable();
+
+    while ( skb != NULL )
+    {
+        nskb = skb->next;
+        __kfree_skb(skb);
+        skb = nskb;
+    }
+}
+
 /* Destructor function for tx skbs. */
 static void tx_skb_release(struct sk_buff *skb)
 {
     int i;
-    net_vif_t *vif = skb->src_vif;
-    unsigned long flags;
+    net_vif_t *vif;
+
+    vif = skb->src_vif;
     
-    spin_lock_irqsave(&vif->domain->page_lock, flags);
+    spin_lock(&vif->domain->page_lock);
     for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
         put_page_tot(skb_shinfo(skb)->frags[i].page);
-    spin_unlock_irqrestore(&vif->domain->page_lock, flags);
-
+    spin_unlock(&vif->domain->page_lock);
+    
     if ( skb->skb_type == SKB_NODATA )
         kmem_cache_free(net_header_cachep, skb->head);
-
+    
     skb_shinfo(skb)->nr_frags = 0; 
-
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    
+    spin_lock(&vif->tx_lock);
     __make_tx_response(vif, skb->guest_id, RING_STATUS_OK);
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
-
+    spin_unlock(&vif->tx_lock);
+    
     /*
-     * Checks below must happen after the above response is posted.
-     * This avoids a possible race with a guest OS on another CPU.
+     * Checks below must happen after the above response is posted. This avoids
+     * a possible race with a guest OS on another CPU.
      */
     smp_mb();
-
+    
     if ( (vif->tx_cons == vif->tx_prod) && get_tx_bufs(vif) )
     {
         add_to_net_schedule_list_tail(vif);
         maybe_schedule_tx_action();        
     }
-
+    
     put_vif(vif);
 }
 
@@ -1849,12 +1874,11 @@ static int get_tx_bufs(net_vif_t *vif)
     struct sk_buff     *skb;
     tx_req_entry_t      tx;
     int                 i, j, ret = 0;
-    unsigned long       flags;
 
     if ( vif->tx_req_cons == shared_idxs->tx_req_prod )
         return 0;
 
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    spin_lock(&vif->tx_lock);
 
     /* Currently waiting for more credit? */
     if ( vif->remaining_credit == 0 )
@@ -2013,7 +2037,7 @@ static int get_tx_bufs(net_vif_t *vif)
         vif->tx_prod = j;
 
  out:
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
+    spin_unlock(&vif->tx_lock);
 
     return ret;
 }
@@ -2063,14 +2087,14 @@ static long get_bufs_from_vif(net_vif_t *vif)
         pte_pfn = rx.addr >> PAGE_SHIFT;
         pte_page = frame_table + pte_pfn;
             
-        spin_lock_irq(&p->page_lock);
+        spin_lock(&p->page_lock);
         if ( (pte_pfn >= max_page) || 
              ((pte_page->flags & (PG_type_mask | PG_domain_mask)) != 
               (PGT_l1_page_table | p->domain)) ) 
         {
             DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
                     p->domain, pte_pfn, max_page, pte_page->flags);
-            spin_unlock_irq(&p->page_lock);
+            spin_unlock(&p->page_lock);
             make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
             continue;
         }
@@ -2117,7 +2141,7 @@ static long get_bufs_from_vif(net_vif_t *vif)
             
     rx_unmap_and_continue:
         unmap_domain_mem(ptep);
-        spin_unlock_irq(&p->page_lock);
+        spin_unlock(&p->page_lock);
     }
 
     vif->rx_req_cons = i;
@@ -2135,7 +2159,7 @@ static long get_bufs_from_vif(net_vif_t *vif)
 long flush_bufs_for_vif(net_vif_t *vif)
 {
     int i;
-    unsigned long *pte, flags;
+    unsigned long *pte;
     struct pfn_info *page;
     struct task_struct *p = vif->domain;
     rx_shadow_entry_t *rx;
@@ -2143,7 +2167,7 @@ long flush_bufs_for_vif(net_vif_t *vif)
     net_idx_t *shared_idxs = vif->shared_idxs;
 
     /* Return any outstanding receive buffers to the guest OS. */
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
     for ( i = vif->rx_req_cons; 
           (i != shared_idxs->rx_req_prod) && 
               (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 
@@ -2181,13 +2205,13 @@ long flush_bufs_for_vif(net_vif_t *vif)
         make_rx_response(vif, rx->id, 0, RING_STATUS_DROPPED, 0);
     }
     vif->rx_cons = i;
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
 
     /*
      * Flush pending transmit buffers. The guest may still have to wait for
      * buffers that are queued at a physical NIC.
      */
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    spin_lock(&vif->tx_lock);
     for ( i = vif->tx_req_cons; 
           (i != shared_idxs->tx_req_prod) && 
               (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
@@ -2197,7 +2221,7 @@ long flush_bufs_for_vif(net_vif_t *vif)
                            RING_STATUS_DROPPED);
     }
     vif->tx_req_cons = i;
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
+    spin_unlock(&vif->tx_lock);
 
     return 0;
 }
@@ -2236,7 +2260,7 @@ long do_net_io_op(netop_t *uop)
 
     case NETOP_RESET_RINGS:
         /* We take the tx_lock to avoid a race with get_tx_bufs. */
-        spin_lock_irq(&vif->tx_lock);
+        spin_lock(&vif->tx_lock);
         if ( (vif->rx_req_cons != vif->rx_resp_prod) || 
              (vif->tx_req_cons != vif->tx_resp_prod) )
         {
@@ -2249,7 +2273,7 @@ long do_net_io_op(netop_t *uop)
             vif->tx_req_cons = vif->tx_resp_prod = 0;
             ret = 0;
         }
-        spin_unlock_irq(&vif->tx_lock);
+        spin_unlock(&vif->tx_lock);
         break;
 
     case NETOP_GET_VIF_INFO:
@@ -2297,12 +2321,11 @@ static void make_rx_response(net_vif_t     *vif,
                              unsigned char  st,
                              unsigned char  off)
 {
-    unsigned long flags;
     unsigned int pos;
     rx_resp_entry_t *resp;
 
     /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&vif->rx_lock, flags);
+    spin_lock(&vif->rx_lock);
     pos  = vif->rx_resp_prod;
     resp = &vif->shared_rings->rx_ring[pos].resp;
     resp->id     = id;
@@ -2317,19 +2340,24 @@ static void make_rx_response(net_vif_t     *vif,
         unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET);
         guest_event_notify(cpu_mask);    
     }
-    spin_unlock_irqrestore(&vif->rx_lock, flags);
+    spin_unlock(&vif->rx_lock);
 }
 
 
 int setup_network_devices(void)
 {
-    int i, ret;
+    int ret;
     extern char opt_ifname[];
 
-    for ( i = 0; i < smp_num_cpus; i++ )
-        skb_queue_head_init(&rx_skb_queue[i]);
+    memset(skb_queue, 0, sizeof(skb_queue));
 
+    /* Actual receive processing happens in softirq context. */
     open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+    /* Processing of defunct transmit buffers happens in softirq context. */
+    open_softirq(NET_TX_SOFTIRQ, net_tx_gc, NULL);
+
+    /* Tranmit scheduling happens in a tasklet to exclude other processors. */
     tasklet_enable(&net_tx_tasklet);
 
     if ( (the_dev = dev_get_by_name(opt_ifname)) == NULL ) 
-- 
2.30.2